﻿using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.ComponentModel.Composition;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using WordNet.Core.Extensions;

namespace WordNet.Core.Morphy
{
    // It uses the rules of detachment contained in the list of inflectional endings.
    [Export(typeof(IStemmer))]
    [PartCreationPolicy(CreationPolicy.NonShared)]
    public class SimpleStemmer : IStemmer
    {
        // It is  the list of inflectional endings.
        // If a word ends with one of the suffixes, 
        // the suffix is stripped from the word and the corresponding ending is added.
        public const String underscore = "_";
        public const String SUFFIX_ches = "ches";
        public const String SUFFIX_ed = "ed";
        public const String SUFFIX_es = "es";
        public const String SUFFIX_est = "est";
        public const String SUFFIX_er = "er";
        public const String SUFFIX_ful = "ful";
        public const String SUFFIX_ies = "ies";
        public const String SUFFIX_ing = "ing";
        public const String SUFFIX_men = "men";
        public const String SUFFIX_s = "s";
        public const String SUFFIX_ss = "ss";
        public const String SUFFIX_ses = "ses";
        public const String SUFFIX_shes = "shes";
        public const String SUFFIX_xes = "xes";
        public const String SUFFIX_zes = "zes";

        public const String ENDING_null = "";
        public const String ENDING_ch = "ch";
        public const String ENDING_e = "e";
        public const String ENDING_man = "man";
        public const String ENDING_s = SUFFIX_s;
        public const String ENDING_sh = "sh";
        public const String ENDING_x = "x";
        public const String ENDING_y = "y";
        public const String ENDING_z = "z";

        static readonly ReadOnlyDictionary<POS, List<StemmingRule>> ruleMap;

        static ReadOnlyDictionary<POS, List<StemmingRule>> SetUp()
        {
            Dictionary<POS, List<StemmingRule>> ruleMapHidden = new Dictionary<POS, List<StemmingRule>>();
            List<StemmingRule> list;
            String[] nullSuffixArray = null;

            // nouns
            list = new List<StemmingRule>(8);
            list.Add(new StemmingRule(SUFFIX_s, ENDING_null, POS.NOUN, SUFFIX_ss));
            list.Add(new StemmingRule(SUFFIX_ses, ENDING_s, POS.NOUN, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_xes, ENDING_x, POS.NOUN, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_zes, ENDING_z, POS.NOUN, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ches, ENDING_ch, POS.NOUN, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_shes, ENDING_sh, POS.NOUN, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_men, ENDING_man, POS.NOUN, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ies, ENDING_y, POS.NOUN, nullSuffixArray));
            ruleMapHidden.Add(POS.NOUN, list);

            // verbs
            list = new List<StemmingRule>(8);
            list.Add(new StemmingRule(SUFFIX_s, ENDING_null, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ies, ENDING_y, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_es, ENDING_e, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_es, ENDING_null, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ed, ENDING_e, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ed, ENDING_null, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ing, ENDING_e, POS.VERB, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_ing, ENDING_null, POS.VERB, nullSuffixArray));
            ruleMapHidden.Add(POS.VERB, list);

            // adjectives
            list = new List<StemmingRule>(4);
            list.Add(new StemmingRule(SUFFIX_er, ENDING_e, POS.ADJECTIVE, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_er, ENDING_null, POS.ADJECTIVE, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_est, ENDING_e, POS.ADJECTIVE, nullSuffixArray));
            list.Add(new StemmingRule(SUFFIX_est, ENDING_null, POS.ADJECTIVE, nullSuffixArray));
            ruleMapHidden.Add(POS.ADJECTIVE, list);

            // adverbs
            ruleMapHidden.Add(POS.ADVERB, new List<StemmingRule>());

            return new ReadOnlyDictionary<POS, List<StemmingRule>>(ruleMapHidden);
        }

        static SimpleStemmer()
        {
            ruleMap = SetUp();
        }

        public SimpleStemmer() { }

        public ReadOnlyDictionary<POS, List<StemmingRule>> getRuleMap()
        {
            return ruleMap;
        }

        public virtual List<string> FindStems(string word, POS pos)
        {
            word = word.ToLower();
            word = word.WhitespaceToUnderscore();

            // if pos is null, do all
            if (pos == null)
            {
                List<String> result = new List<String>();
                foreach (POS p in POS.Values())
                    result.AddRange(FindStems(word, p));

                return result;
            }

            bool isCollocation = word.Contains(underscore);

            switch (pos.Name)
            {
                case "noun":
                    return isCollocation ? GetNounCollocationRoots(word) : StripNounSuffix(word);
                case "verb":
                    // here we check for composites
                    return isCollocation ? GetVerbCollocationRoots(word) : StripVerbSuffix(word);
                case "adjective":
                    return StripAdjectiveSuffix(word);
                case "adverb":
                    return new List<string>();
                default:
                    throw new ArgumentException();

            }
        }

        //  Handles stemming noun collocations.
        private List<String> GetNounCollocationRoots(String composite)
        {

            // split into parts
            String[] parts = composite.Split(new string[] { underscore }, StringSplitOptions.RemoveEmptyEntries);
            if (parts.Length < 2)
                return new List<string>();

            // stem each part
            List<List<String>> rootSets = new List<List<String>>(parts.Length);
            for (int i = 0; i < parts.Length; i++)
                rootSets.Add(FindStems(parts[i], POS.NOUN));

            // reassemble all combinations
            HashSet<StringBuilder> poss = new HashSet<StringBuilder>();

            // seed the set
            List<String> rootSet = rootSets[0];
            if (rootSet == null)
            {
                poss.Add(new StringBuilder(parts[0]));
            }
            else
            {
                foreach (var r in rootSet)
                    poss.Add(new StringBuilder(r));
            }

            // make all combinations
            StringBuilder newBuf;
            HashSet<StringBuilder> replace;
            for (int i = 1; i < rootSets.Count(); i++)
            {
                rootSet = rootSets[i];
                if (rootSet.Count() == 0)
                {
                    foreach (StringBuilder p in poss)
                    {
                        p.Append("_");
                        p.Append(parts[i]);
                    }
                }
                else
                {
                    replace = new HashSet<StringBuilder>();
                    foreach (StringBuilder p in poss)
                    {
                        foreach (Object r in rootSet)
                        {
                            newBuf = new StringBuilder();
                            newBuf.Append(p.ToString());
                            newBuf.Append("_");
                            newBuf.Append(r);
                            replace.Add(newBuf);
                        }
                    }
                    poss.Clear();
                    foreach (var item in replace)
                    {
                        poss.Add(item);
                    }

                }
            }

            if (poss.Count() == 0)
            {
                return new List<string>();
            }


            // make sure to remove empties
            List<String> result = new List<String>();
            String root = string.Empty;
            foreach (StringBuilder p in poss)
            {
                root = p.ToString().Trim();
                if (root.Length != 0)
                    result.Add(root);
            }

            return new List<String>(result);
        }

        // Strips suffixes from the specified word according to the noun rules.
        private List<String> StripNounSuffix(String noun)
        {

            if (noun.Length <= 2)
                return new List<string>();

            // strip off "ful"
            String word = noun;
            String suffix = null;
            if (noun.EndsWith(SUFFIX_ful))
            {
                word = noun.Substring(0, noun.Length - SUFFIX_ful.Length);
                suffix = SUFFIX_ful;
            }

            // we will return this to the caller
            List<String> result = new List<String>();

            // apply the rules
            String root;
            foreach (StemmingRule rule in getRuleMap()[POS.NOUN])
            {
                root = rule.AddSuffixToEnd(word, suffix);
                if (root != null && root.Length > 0)
                    result.Add(root);
            }

            return result.Count() == 0 ?
                new List<string>() :
                new List<String>(result);

        }

        // Strips suffixes from the specified word according to the verb rules.
        private List<String> StripVerbSuffix(String verb)
        {

            if (verb.Length <= 2)
                return new List<string>();

            // we will return this to the caller
            List<String> result = new List<String>();

            // apply the rules
            String root;
            foreach (StemmingRule rule in getRuleMap()[POS.VERB])
            {
                root = rule.ApplyTo(verb);
                if (root != null && root.Length > 0)
                    result.Add(root);
            }

            return result.Count() == 0 ?
                    new List<string>() :
                    new List<String>(result);
        }

        //  Handles stemming verb collocations.
        private List<String> GetVerbCollocationRoots(String composite)
        {

            // split into parts
            String[] parts = composite.Split(new string[] { underscore }, StringSplitOptions.RemoveEmptyEntries);
            if (parts.Length < 2)
                return new List<string>();

            // find the stems of each parts
            List<List<String>> rootSets = new List<List<String>>(parts.Length);
            for (int i = 0; i < parts.Length; i++)
                rootSets.Add(FindStems(parts[i], POS.VERB));

            List<String> result = new List<String>();

            // form all combinations
            StringBuilder rootBuffer = new StringBuilder();
            for (int i = 0; i < parts.Length; i++)
            {
                if (rootSets[i] == null)
                    continue;
                foreach (var partRoot in rootSets[i])
                {
                    if (rootBuffer.Length > 0)
                        rootBuffer.Clear();

                    for (int j = 0; j < parts.Length; j++)
                    {
                        if (j == i)
                        {
                            rootBuffer.Append((String)partRoot);
                        }
                        else
                        {
                            rootBuffer.Append(parts[j]);
                        }
                        if (j < parts.Length - 1)
                            rootBuffer.Append(underscore);
                    }
                    result.Add(rootBuffer.ToString());
                }
            }

            // remove any empties
            List<string> r = new List<string>();
            for (int i = 0; i < result.Count(); i++)
                if (result[i].Length != 0)
                    r.Add(result[i]);

            return result.Count() == 0 ?
                result : new List<String>(result);
        }

        // Strips suffixes from the specified word according to the adjective rules.
        private List<String> StripAdjectiveSuffix(String adj)
        {

            // we will return this to the caller
            List<String> result = new List<String>();

            // apply the rules
            String root;
            foreach (StemmingRule rule in getRuleMap()[(POS.ADJECTIVE)])
            {
                root = rule.ApplyTo(adj);
                if (root != null && root.Length > 0)
                    result.Add(root);
            }

            return result.Count() == 0 ?
                new List<string>() :
                new List<String>(result);
        }

    }
}
